package KnowSparkSQL.DSL.DatasetMethods;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

import java.util.List;
import java.util.function.Consumer;

public class DatasetToViewData {
    public static void main(String[] args) {
        /**
         * 获取DataSet<Row>若干行数据
         *
         * DataSet<Row>的show()方法的作用是格式化显示数据。不传参，默认显示前20条，传参可以指定显示前N条。
         */
        SparkSession ss = SparkSession.builder().appName("CreateDataset3").master("local").getOrCreate();

        Dataset<Row> people = ss.read().json("./data/people.json");

        Row first = people.first();
        System.out.println(first);

        // 获取前 n 行 返回一个list对象
        List<Row> rows = people.takeAsList(2);
        for (Row l : rows){
            System.out.println(l);
        }
        // 获取所有行 返回一个list对象
        List<Row> collectAsList = people.collectAsList();



        collectAsList.forEach(new Consumer<Row>() {
            @Override
            public void accept(Row row) {
                System.out.println(row);
            }
        });

        people.printSchema();

    }
}
